#<center>

#![Dr. Wayne Stewart](wayne.jpg "My Picture"){ width=20% }

#</center>

1 My Video

#<video width="320" height="240" controls>
#  <source src="usingvideoinrmd.mp4" type="video/mp4">
#Your browser does not support the video tag.
#</video>
library(s20x)
wine=read.csv("winequality-red.csv")

trendscatter(wine$quality~wine$volatile.acidity,f=0.3,data=wine)

wine.fit=lm(wine$quality~wine$volatile.acidity,data=wine)
summary(wine.fit)
## 
## Call:
## lm(formula = wine$quality ~ wine$volatile.acidity, data = wine)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.79071 -0.54411 -0.00687  0.47350  2.93148 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.56575    0.05791  113.39   <2e-16 ***
## wine$volatile.acidity -1.76144    0.10389  -16.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7437 on 1597 degrees of freedom
## Multiple R-squared:  0.1525, Adjusted R-squared:  0.152 
## F-statistic: 287.4 on 1 and 1597 DF,  p-value: < 2.2e-16
eovcheck(wine.fit)

summary(wine.fit)
## 
## Call:
## lm(formula = wine$quality ~ wine$volatile.acidity, data = wine)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.79071 -0.54411 -0.00687  0.47350  2.93148 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.56575    0.05791  113.39   <2e-16 ***
## wine$volatile.acidity -1.76144    0.10389  -16.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7437 on 1597 degrees of freedom
## Multiple R-squared:  0.1525, Adjusted R-squared:  0.152 
## F-statistic: 287.4 on 1 and 1597 DF,  p-value: < 2.2e-16
plot(wine$quality~wine$volatile.acidity,main="Fitted model",xlab="alcohol",ylab="quality",data=wine)
abline(wine.fit)

ciReg(wine.fit)
##                       95 % C.I.lower    95 % C.I.upper
## (Intercept)                  6.45217           6.67932
## wine$volatile.acidity       -1.96522          -1.55765
plot(wine.fit,which=1)

normcheck(wine.fit, shapiro.wilk = TRUE)

cooks20x(wine.fit)

wine.ph=lm(wine$quality~wine$pH,data=wine)
wine.tsd=lm(wine$quality~wine$total.sulfur.dioxide,data=wine)
wine.alcohol=lm(wine$quality~wine$alcohol,data=wine)
wine.va=lm(wine$quality~wine$volatile.acidity,data=wine)
wine.fsd=lm(wine$quality~wine$free.sulfur.dioxide,data=wine)
wine.fa=lm(wine$quality~wine$fixed.acidity,data=wine)
wine.rs=lm(wine$quality~wine$residual.sugar,data=wine)
wine.chl=lm(wine$quality~wine$chlorides,data=wine)
wine.dens=lm(wine$quality~wine$density,data=wine)
wine.ca=lm(wine$quality~wine$citric.acid,data=wine)
summary(wine.ph)
## 
## Call:
## lm(formula = wine$quality ~ wine$pH, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6817 -0.6394  0.3032  0.3878  2.4874 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.6359     0.4332  15.320   <2e-16 ***
## wine$pH      -0.3020     0.1307  -2.311    0.021 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8065 on 1597 degrees of freedom
## Multiple R-squared:  0.003333,   Adjusted R-squared:  0.002709 
## F-statistic:  5.34 on 1 and 1597 DF,  p-value: 0.02096
summary(wine.tsd)
## 
## Call:
## lm(formula = wine$quality ~ wine$total.sulfur.dioxide, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8063 -0.6336  0.2164  0.3800  2.5527 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                5.8471792  0.0343670 170.140  < 2e-16 ***
## wine$total.sulfur.dioxide -0.0045442  0.0006037  -7.527 8.62e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7939 on 1597 degrees of freedom
## Multiple R-squared:  0.03426,    Adjusted R-squared:  0.03366 
## F-statistic: 56.66 on 1 and 1597 DF,  p-value: 8.622e-14
summary(wine.alcohol)
## 
## Call:
## lm(formula = wine$quality ~ wine$alcohol, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8442 -0.4112 -0.1690  0.5166  2.5888 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.87497    0.17471   10.73   <2e-16 ***
## wine$alcohol  0.36084    0.01668   21.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7104 on 1597 degrees of freedom
## Multiple R-squared:  0.2267, Adjusted R-squared:  0.2263 
## F-statistic: 468.3 on 1 and 1597 DF,  p-value: < 2.2e-16
summary(wine.va)
## 
## Call:
## lm(formula = wine$quality ~ wine$volatile.acidity, data = wine)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.79071 -0.54411 -0.00687  0.47350  2.93148 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.56575    0.05791  113.39   <2e-16 ***
## wine$volatile.acidity -1.76144    0.10389  -16.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7437 on 1597 degrees of freedom
## Multiple R-squared:  0.1525, Adjusted R-squared:  0.152 
## F-statistic: 287.4 on 1 and 1597 DF,  p-value: < 2.2e-16
summary(wine.fsd)
## 
## Call:
## lm(formula = wine$quality ~ wine$free.sulfur.dioxide, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6864 -0.6394  0.3215  0.3762  2.4661 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               5.698107   0.036678 155.357   <2e-16 ***
## wine$free.sulfur.dioxide -0.003911   0.001929  -2.027   0.0428 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8068 on 1597 degrees of freedom
## Multiple R-squared:  0.002566,   Adjusted R-squared:  0.001941 
## F-statistic: 4.109 on 1 and 1597 DF,  p-value: 0.04283
summary(wine.fa)
## 
## Call:
## lm(formula = wine$quality ~ wine$fixed.acidity, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8248 -0.6061  0.1925  0.4341  2.5550 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         5.15732    0.09789  52.684  < 2e-16 ***
## wine$fixed.acidity  0.05754    0.01152   4.996  6.5e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8016 on 1597 degrees of freedom
## Multiple R-squared:  0.01539,    Adjusted R-squared:  0.01477 
## F-statistic: 24.96 on 1 and 1597 DF,  p-value: 6.496e-07
summary(wine.rs)
## 
## Call:
## lm(formula = wine$quality ~ wine$residual.sugar, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6609 -0.6334  0.3580  0.3690  2.3729 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         5.616055   0.041616 134.950   <2e-16 ***
## wine$residual.sugar 0.007865   0.014331   0.549    0.583    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8077 on 1597 degrees of freedom
## Multiple R-squared:  0.0001886,  Adjusted R-squared:  -0.0004375 
## F-statistic: 0.3012 on 1 and 1597 DF,  p-value: 0.5832
summary(wine.chl)
## 
## Call:
## lm(formula = wine$quality ~ wine$chlorides, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6946 -0.6503  0.3010  0.3607  2.3607 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5.82948    0.04229 137.852  < 2e-16 ***
## wine$chlorides -2.21184    0.42578  -5.195 2.31e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8011 on 1597 degrees of freedom
## Multiple R-squared:  0.01662,    Adjusted R-squared:  0.016 
## F-statistic: 26.99 on 1 and 1597 DF,  p-value: 2.313e-07
summary(wine.dens)
## 
## Call:
## lm(formula = wine$quality ~ wine$density, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7885 -0.6216  0.1554  0.4271  2.5177 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     80.24      10.51   7.636 3.83e-14 ***
## wine$density   -74.85      10.54  -7.100 1.87e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7954 on 1597 degrees of freedom
## Multiple R-squared:  0.0306, Adjusted R-squared:  0.02999 
## F-statistic: 50.41 on 1 and 1597 DF,  p-value: 1.875e-12
summary(wine.ca)
## 
## Call:
## lm(formula = wine$quality ~ wine$citric.acid, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0011 -0.5976  0.1021  0.5057  2.5901 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.38172    0.03372 159.610   <2e-16 ***
## wine$citric.acid  0.93845    0.10104   9.288   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7869 on 1597 degrees of freedom
## Multiple R-squared:  0.05124,    Adjusted R-squared:  0.05065 
## F-statistic: 86.26 on 1 and 1597 DF,  p-value: < 2.2e-16
library(d3scatter)
library(crosstalk)
library(leaflet)
library(DT)

shared_wine <- SharedData$new(wine[])
bscols(widths = c(5,NA,NA),
  list(
    filter_checkbox("quality", "Quality", shared_wine, ~quality, inline = TRUE),
    filter_slider("sulphates", "Sulphates", shared_wine, ~sulphates, width = "100%")#,
    #filter_select("auto", "Automatic", shared_mtcars, ~ifelse(am == 0, "Yes", "No"))
  ),
  d3scatter(shared_wine, ~sulphates, ~quality, ~factor(pH), width="100%", height=250),
  d3scatter(shared_wine, ~sulphates, ~quality, ~factor(quality), width="100%", height=250)
)

2 Introduction

Wine is one of humanities oldest alcoholic beverages. It has been used recreationally, religiously, and medicinally throughout the millennia. Wine was seen as a creation by the Greek god Dionysos (or Bacchus, if you were Roman) to the people. It was said that the god gifted the vine to a man named Ikarios. Ikarios used this gift to make wine, which he was inclined to share with his fellow people. When he shared the wine, the others got freaked out by the effects, thinking that they had been poisoned. They ended up murdering Ikarios, and his creation spread from there.

Dionysos

Although there are still religious rituals that use wine, such as the Christian ritual known as “communion” or the “Eucharist”, it is used far more frequently for recreational purposes. Wine makes up roughly 11.4% of all alcohol sold in America, which comes out to ~ 3.8 billion liters of the fruity beverage.

Alcohol

With that much wine being sold in just America alone, companies have plenty of motivation to produce quality wines for people to enjoy. When it comes to the quality of a wine, much is subjective. How do you quantify quality of taste? The wine must be tasted by humans in order to determine the quality. With such subjective testing, its difficult to find what properties of the wine that people like. In order to try to find a trend in quality, various physiochemical properties have been recorded along with their coinsiding quality rating in this dataset.

2.1 My Interest

I am a college student. Alcohol is well ingrained into the college culture. As a man who is part of said culture, I have had my fair share of alcoholic beverages. The more that my pallete has adjusted, the more I have become appreciative of wine. As a huge fan of juice growing up, it just makes sense that I make the gradual transition over to wine. It has quickly become one of my go-to drinks when it is available. With my growing interest in this drink, I have often wondered why some wines make me fall in love, whereas other wines make me fall over. This analysis is an attempt to seek out and identify the factors that contribute to this overall quality.

2.2 Data

This data is pulled from a 2009 study in which researchers at the University of Minho in Portugal attempted to predict taste preferences using physiochemical properties of red wine. The dataset contains 1599 samples with 11 different chemical properties and the qualities for each sample. The properties measured are the fixed acidity, volatile acidity, citric acid, residual sugar, chlorides, free sulfur dioxide, total sulfur dioxide, density, pH, sulphates, and alcohol. The quality is based on a scale of 1 (worst) to 10 (best).

wine=read.csv("winequality-red.csv")
head(wine)

2.3 The Problem to Solve

I will analyze the data in an attempt to find a correlation between the amount compounds in a wine and its overall quality. I do not expect all compounds to make a statistical difference in quality, but I am hoping to find at least a few. The results of this analysis will help me and anyone else who reads this make better guesses as to the quality of wine based on the compounds it contains.

2.4 Preliminary Plots and Interpretation of the Data

There are a lot of factors that could play a roll in the quality of the wine. I will show graphs of the wine quality given each compound.

2.4.1 Quality~Fixed Acidity

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.92457, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.2 Quality~Volatile Acidity

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.98876, p-value = 8.727e-10
## `geom_smooth()` using formula 'y ~ x'

2.4.3 Quality~Citric Acid

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.96836, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.4 Quality~Residual Sugar

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.86397, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.5 Quality~Chlorides

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.89555, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.6 Quality~Free Sulfur Dioxide

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.88769, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.7 Quality~Total Sulfur Dioxide

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.94436, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.8 Quality~Density

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.94445, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.9 Quality~pH

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.89281, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.10 Quality~Sulphates

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.95736, p-value < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.11 Quality~Alcohol

## 
##  Shapiro-Wilk normality test
## 
## data:  wine.stder
## W = 0.97576, p-value = 8.737e-16
## `geom_smooth()` using formula 'y ~ x'

2.4.12 Sub sub headings can be useful

Testing1

2.4.13 Plot data

library(ggplot2)
g = ggplot(mtcars, aes(x = disp, y = mpg, color = cyl)) + geom_point()
g = g + geom_smooth(method = "loess")
g
## `geom_smooth()` using formula 'y ~ x'
Graph of data with loess smoother

Graph of data with loess smoother

2.5 How were the data collected?

2.6 What is the story behind the data?

2.7 Why was it gathered?

2.8 What is your interest in the data?

2.8.1 Include pictures ![](jpeg)

2.9 What problem do you wish to solve?

3 Theory behind the Analysis

As you can see in the preliminary graphs, the data is mostly distributed noramlly, but there isn’t much of a linear trend to analyze. I believe that the quality would drop given that too much of any one compound is in the wine. That being said, I will still attempt to find some linear correlation within the data, even if the relationship is minimal. The histograms show that the independent variables have an uneven distribution of the frequency of values tested. This can cause unexpected and faux trends in the data. Even though this may skew some results, I will conveniently ignore those facts in order to produce some sort of regressional analysis. I will be make a probabilistic model of a simple linear regression. The model assumes that the line produce by \(\bar{y}\) given any value of \(x\). Any deviating points are are represented by \(\epsilon\). The equation of the line is: \[y=\beta_0+\beta_1x_i+\epsilon_i\] The variables \(\beta_0\), \(\beta_1\), and \(\epsilon_i\) are as follows: \(\beta_0\) and \(\beta_1\) are random variables, and \(\epsilon_i\) is the random error. If you drop \(\epsilon_i\), you get the equation for \(\bar{y}\) given any value of \(x\). This can be represented as such: \[E(y)=E(\beta_0+\beta_1x_i+\epsilon_i)\] \[=\beta_0+\beta_1x_i+E(\epsilon_i)\] \[=\beta_0+\beta_1x_i\] Given this information, we can represent this as \(E(Y|x)\) where \(\beta_0\) is the y-intercept and \(\beta_1\) is the slope. According to Mendenhall and Sinich 2016, we need to make the following assumptions about \(\epsilon\) in order to estimate the \(\beta\) parameters. These assumptions are:

  • The mean of the probability distribution of \(\epsilon\) is 0.
  • The variance of the probability distribution of \(\epsilon\) is constant for all settings of the independent variable x.
  • The probability distribution of \(\epsilon\) is normal.
  • The error associated with one value of y has no effect on the errors associated with other y values.

Given these assumptions, we should be able to produce some estimates for the \(\beta\) parameters.

4 Estimating the Parameters

Since the data isn’t very linear and there are a lot of independent variables, I will be selecting 2 independent variables that show the best fit based on their multiple \(R^2\) value. The 2 variables with the highest multiple \(R^2\) values are alcohol and volatile acidity.

summary(wine.alc)
## 
## Call:
## lm(formula = wine$quality ~ wine$alcohol, data = wine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8442 -0.4112 -0.1690  0.5166  2.5888 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.87497    0.17471   10.73   <2e-16 ***
## wine$alcohol  0.36084    0.01668   21.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7104 on 1597 degrees of freedom
## Multiple R-squared:  0.2267, Adjusted R-squared:  0.2263 
## F-statistic: 468.3 on 1 and 1597 DF,  p-value: < 2.2e-16
summary(wine.va)
## 
## Call:
## lm(formula = wine$quality ~ wine$volatile.acidity, data = wine)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.79071 -0.54411 -0.00687  0.47350  2.93148 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.56575    0.05791  113.39   <2e-16 ***
## wine$volatile.acidity -1.76144    0.10389  -16.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7437 on 1597 degrees of freedom
## Multiple R-squared:  0.1525, Adjusted R-squared:  0.152 
## F-statistic: 287.4 on 1 and 1597 DF,  p-value: < 2.2e-16

With multiple \(R^2\) values of 0.2267 and 0.1525, respectively. While very low, they are the best I have to work with. Based on these summaries, we can say that \[\hat{\beta_{0a}}=1.87497\] \[\hat{\beta_{1a}}=0.36084\] and \[\hat{\beta_{0v}}=6.56575\] \[\hat{\beta_{1v}}=-1.76144\]

4.1 Confidence Interval

ciReg(wine.alc,conf.level=0.95,print.out=TRUE)
##              95 % C.I.lower    95 % C.I.upper
## (Intercept)         1.53229           2.21766
## wine$alcohol        0.32813           0.39355
ciReg(wine.va,conf.level=0.95,print.out=TRUE)
##                       95 % C.I.lower    95 % C.I.upper
## (Intercept)                  6.45217           6.67932
## wine$volatile.acidity       -1.96522          -1.55765

4.2 The Least-Squares Estimates

\[\hat{\beta_{0a}}+\hat{\beta_{1a}}x_i=1.87497+0.36084x_i\] \[\hat{\beta_{0v}}+\hat{\beta_{1v}}x_i=6.56575-1.76144x_i\] The slope \(\hat{\beta_{1a}}=0.36084\) tells us that the quality rises 0.36084 for every 1 increase in alcohol. The slope \(\hat{\beta_{1v}}=-1.76144\) tells us that the quality lowers -1.76144 for every 1 increase in volatile acidity.

5 Verifying Assumptions

I will attempt to verify the above assumptions to show whether or not a straight line is the best fit for the model.

plot(wine$quality~wine$alcohol,bg="Purple",pch=21,cex=1.2,data=wine)
abline(wine.alc)

plot(wine$quality~wine$volatile.acidity,bg="Blue",pch=21,cex=1.2,data=wine)
abline(wine.va)

These plots do not indicate that a linear model is the best fit for the data.

5.1 Plot Residuals

The residuals tell us by how much points deviate from the fitted line. They help us to see just how far the points are varying from the line.

plot(wine$quality~wine$alcohol,bg="Green",pch=21,cex=1.2,data=wine)
abline(wine.alc)
yhat=with(wine,predict(wine.alc,data.frame(alcohol)))
with(wine,{segments(alcohol,quality,alcohol,yhat)})
abline(wine.alc)

plot(wine$quality~wine$volatile.acidity,bg="Maroon",pch=21,cex=1.2,data=wine)
abline(wine.va)
yhat=with(wine,predict(wine.va,data.frame(volatile.acidity)))
with(wine,{segments(volatile.acidity,quality,volatile.acidity,yhat)})
abline(wine.va)

As you can see, there is quite a lot of deviation from the line.

5.2 Checks on validity

5.2.1 Straight trend line

5.2.1.1 Use trendscatter

5.2.2 Errors distributed Normally

\[\epsilon_i \sim N(0,\sigma^2)\]

5.2.2.1 Shapiro-wilk

5.2.3 Constant variance

5.2.3.1 Residual vs fitted values

5.2.3.2 trendscatter on Residual Vs Fitted

5.2.4 Zero mean value of \(\epsilon\)

5.2.5 Independence of data

6 Model selection if you compared models

6.1 Use adjusted \(R^2\)

\[R_{adj}^2 =\]

7 Analysis of the data

7.1 Make sure you include many great plots

7.2 Add the trend to the data

7.3 Summary lm object

7.3.1 Interpretation of all tests

7.3.2 Interpretation of multiple R squared

7.3.3 Interpretation of all point estimates

7.4 Calculate cis for \(\beta\) parameter estimates

7.4.1 Use of predict()

7.4.2 Use of ciReg()

7.4.3 Check on outliers using cooks plots

Remember to interpret this plot and all other plots

8 Conclusion

8.1 Answer your research question

8.2 Suggest ways to improve model or experiment

9 References


  1. A little footnote